E-commerce is a trending business sector, of electronically buying and selling products over the internet. This fastgrowing industry has also adapted features like online payments, internet marketing, door-step delivery, returns and exchanges, etc. Since it is a widespread industry that adds to human convenience and luxuries, there is a lot of scope for improvements and business growth, which brings profits to both consumers and industry.
This is a Brazilian ecommerce public dataset of orders made at Olist Store. The dataset has information of 100k orders from 2016 to 2018 made at multiple marketplaces in Brazil. The Dataset contains multiple tables providing information on actual ecommerce purchases along with various other dimensions of a particular purchase like Customer Information, Seller Information, Product Metadata as well as Customer reviews on their purchase experience. The dataset also has geolocation file that relates Brazilian zip codes to latitude/longitude coordinates.
ordergeotable <- separate(ordergeotable,order_purchase_timestamp , into=c("purchasedate", "purchasetime"), sep=" ")
ordergeotable <- separate(ordergeotable, purchasedate , into=c("year", "month", "date"), sep="-")
geo_data <-na.omit(ordergeotable)
state <- read_state(code_state="all", year=2018)
##
|
| | 0%
|
|= | 1%
|
|=== | 4%
|
|===== | 7%
|
|===== | 8%
|
|====== | 10%
|
|======= | 11%
|
|======== | 13%
|
|========== | 16%
|
|============ | 18%
|
|============= | 20%
|
|=============== | 23%
|
|================== | 27%
|
|=================== | 29%
|
|====================== | 34%
|
|======================== | 37%
|
|=========================== | 42%
|
|============================ | 43%
|
|================================= | 50%
|
|================================== | 53%
|
|=================================== | 54%
|
|========================================== | 65%
|
|============================================ | 67%
|
|================================================= | 76%
|
|=================================================== | 79%
|
|==================================================== | 80%
|
|========================================================== | 89%
|
|=========================================================== | 90%
|
|=============================================================== | 98%
|
|================================================================ | 99%
|
|=================================================================| 100%
##
|
| | 0%
|
|= | 1%
|
|==== | 7%
|
|====== | 9%
|
|======= | 11%
|
|========= | 14%
|
|========== | 15%
|
|============ | 19%
|
|============= | 20%
|
|=============== | 22%
|
|================ | 25%
|
|================= | 26%
|
|================== | 28%
|
|=================== | 30%
|
|==================== | 31%
|
|====================== | 34%
|
|======================= | 35%
|
|======================= | 36%
|
|========================= | 38%
|
|========================== | 40%
|
|============================ | 43%
|
|============================== | 47%
|
|=============================== | 47%
|
|================================ | 49%
|
|================================= | 51%
|
|================================== | 53%
|
|==================================== | 56%
|
|===================================== | 56%
|
|===================================== | 57%
|
|====================================== | 59%
|
|======================================= | 60%
|
|======================================== | 62%
|
|========================================== | 64%
|
|========================================== | 65%
|
|=========================================== | 66%
|
|============================================== | 71%
|
|=============================================== | 72%
|
|================================================ | 74%
|
|================================================== | 76%
|
|==================================================== | 79%
|
|==================================================== | 81%
|
|===================================================== | 81%
|
|======================================================= | 85%
|
|========================================================== | 89%
|
|========================================================== | 90%
|
|=========================================================== | 90%
|
|=========================================================== | 91%
|
|============================================================= | 94%
|
|=============================================================== | 97%
|
|=================================================================| 100%
##
|
| | 0%
|
|== | 2%
|
|=========== | 16%
|
|================= | 26%
|
|================== | 28%
|
|===================== | 33%
|
|============================ | 43%
|
|============================= | 45%
|
|================================== | 53%
|
|====================================== | 59%
|
|======================================= | 60%
|
|=========================================== | 67%
|
|============================================= | 69%
|
|================================================ | 74%
|
|=================================================== | 78%
|
|====================================================== | 83%
|
|========================================================== | 89%
|
|================================================================ | 98%
|
|=================================================================| 100%
##
|
| | 0%
|
|== | 2%
|
|======== | 13%
|
|=============== | 23%
|
|================= | 27%
|
|================== | 28%
|
|===================== | 32%
|
|====================== | 34%
|
|======================== | 37%
|
|=========================== | 42%
|
|============================== | 46%
|
|================================== | 52%
|
|====================================== | 58%
|
|======================================== | 62%
|
|========================================== | 65%
|
|============================================ | 67%
|
|================================================== | 77%
|
|======================================================= | 85%
|
|========================================================== | 89%
|
|=========================================================== | 90%
|
|=================================================================| 100%
##
|
| | 0%
|
|= | 1%
|
|==== | 6%
|
|====== | 9%
|
|======= | 10%
|
|========= | 13%
|
|=========== | 16%
|
|============ | 18%
|
|============= | 20%
|
|=============== | 23%
|
|================= | 25%
|
|================= | 26%
|
|================== | 28%
|
|===================== | 32%
|
|======================= | 35%
|
|======================== | 37%
|
|========================= | 38%
|
|=========================== | 42%
|
|============================= | 44%
|
|============================== | 46%
|
|=============================== | 47%
|
|================================ | 49%
|
|================================ | 50%
|
|=================================== | 54%
|
|=================================== | 55%
|
|===================================== | 58%
|
|====================================== | 59%
|
|======================================= | 61%
|
|========================================= | 63%
|
|========================================= | 64%
|
|=========================================== | 66%
|
|============================================ | 68%
|
|============================================= | 70%
|
|=============================================== | 72%
|
|=============================================== | 73%
|
|================================================= | 75%
|
|================================================== | 77%
|
|==================================================== | 80%
|
|===================================================== | 82%
|
|====================================================== | 83%
|
|======================================================== | 86%
|
|========================================================== | 89%
|
|=========================================================== | 91%
|
|============================================================= | 93%
|
|=============================================================== | 97%
|
|================================================================ | 98%
|
|=================================================================| 100%
##
|
| | 0%
|
|= | 1%
|
|======= | 10%
|
|============== | 22%
|
|================== | 27%
|
|==================== | 31%
|
|====================== | 34%
|
|========================= | 39%
|
|============================= | 44%
|
|============================== | 47%
|
|================================ | 49%
|
|==================================== | 56%
|
|===================================== | 57%
|
|======================================== | 62%
|
|============================================== | 70%
|
|================================================= | 75%
|
|=================================================== | 78%
|
|=================================================== | 79%
|
|==================================================== | 80%
|
|======================================================== | 87%
|
|=========================================================== | 91%
|
|============================================================= | 93%
|
|=================================================================| 100%
##
|
| | 0%
|
|= | 2%
|
|========== | 15%
|
|=================== | 28%
|
|===================== | 33%
|
|====================== | 34%
|
|=========================== | 42%
|
|=============================== | 48%
|
|==================================== | 56%
|
|====================================== | 58%
|
|=========================================== | 65%
|
|============================================= | 69%
|
|============================================== | 71%
|
|=================================================== | 78%
|
|======================================================= | 84%
|
|============================================================ | 92%
|
|============================================================== | 95%
|
|=================================================================| 99%
|
|=================================================================| 100%
##
|
| | 0%
|
|=== | 4%
|
|===== | 8%
|
|====== | 9%
|
|======== | 12%
|
|======== | 13%
|
|========== | 15%
|
|========== | 16%
|
|=========== | 16%
|
|============ | 19%
|
|============= | 21%
|
|=============== | 22%
|
|=============== | 23%
|
|================= | 26%
|
|================= | 27%
|
|==================== | 30%
|
|===================== | 32%
|
|====================== | 34%
|
|======================= | 35%
|
|========================= | 38%
|
|=========================== | 42%
|
|============================= | 44%
|
|============================= | 45%
|
|============================== | 46%
|
|=============================== | 48%
|
|================================ | 49%
|
|================================== | 52%
|
|================================== | 53%
|
|=================================== | 53%
|
|=================================== | 54%
|
|===================================== | 58%
|
|====================================== | 58%
|
|====================================== | 59%
|
|======================================== | 62%
|
|========================================= | 63%
|
|========================================== | 64%
|
|============================================ | 68%
|
|============================================== | 71%
|
|=============================================== | 73%
|
|================================================ | 73%
|
|=================================================== | 78%
|
|==================================================== | 80%
|
|===================================================== | 81%
|
|===================================================== | 82%
|
|====================================================== | 83%
|
|======================================================= | 84%
|
|========================================================= | 87%
|
|========================================================== | 90%
|
|=========================================================== | 90%
|
|=========================================================== | 91%
|
|============================================================ | 92%
|
|============================================================== | 95%
|
|=============================================================== | 97%
|
|================================================================ | 99%
|
|=================================================================| 100%
##
|
| | 0%
|
|== | 3%
|
|=========== | 17%
|
|============== | 21%
|
|=============== | 23%
|
|==================== | 31%
|
|======================== | 37%
|
|========================== | 40%
|
|================================= | 51%
|
|=================================== | 54%
|
|============================================= | 70%
|
|================================================ | 74%
|
|================================================= | 75%
|
|================================================== | 77%
|
|========================================================= | 88%
|
|========================================================== | 90%
|
|============================================================ | 92%
|
|============================================================= | 94%
|
|=================================================================| 100%
##
|
| | 0%
|
|= | 2%
|
|=== | 5%
|
|==== | 6%
|
|===== | 7%
|
|====== | 10%
|
|======= | 11%
|
|======== | 12%
|
|========= | 15%
|
|========== | 16%
|
|=========== | 17%
|
|============= | 19%
|
|============== | 22%
|
|================= | 26%
|
|================= | 27%
|
|===================== | 33%
|
|======================= | 35%
|
|========================= | 39%
|
|========================== | 40%
|
|=========================== | 42%
|
|============================ | 43%
|
|============================= | 45%
|
|=============================== | 48%
|
|================================= | 51%
|
|================================== | 53%
|
|========================================= | 64%
|
|=========================================== | 66%
|
|================================================ | 73%
|
|================================================= | 76%
|
|===================================================== | 82%
|
|====================================================== | 83%
|
|=========================================================== | 91%
|
|============================================================= | 94%
|
|=============================================================== | 97%
|
|=================================================================| 100%
##
|
| | 0%
|
|= | 1%
|
|========== | 15%
|
|================ | 25%
|
|================== | 28%
|
|=================== | 30%
|
|======================= | 35%
|
|========================= | 38%
|
|============================== | 47%
|
|===================================== | 57%
|
|========================================= | 64%
|
|=========================================== | 67%
|
|================================================ | 74%
|
|================================================== | 76%
|
|==================================================== | 79%
|
|============================================================== | 95%
|
|================================================================ | 99%
|
|=================================================================| 100%
##
|
| | 0%
|
|== | 2%
|
|=========== | 17%
|
|=============== | 23%
|
|================= | 26%
|
|====================== | 34%
|
|======================== | 37%
|
|============================ | 43%
|
|============================== | 46%
|
|==================================== | 55%
|
|========================================= | 63%
|
|=========================================== | 67%
|
|=============================================== | 72%
|
|================================================= | 75%
|
|================================================= | 76%
|
|====================================================== | 84%
|
|========================================================== | 89%
|
|============================================================ | 92%
|
|============================================================= | 94%
|
|=================================================================| 100%
##
|
| | 0%
|
|= | 1%
|
|== | 3%
|
|=== | 4%
|
|====== | 9%
|
|======== | 12%
|
|=========== | 18%
|
|============ | 18%
|
|============== | 21%
|
|=============== | 23%
|
|================ | 24%
|
|================= | 26%
|
|=================== | 29%
|
|=================== | 30%
|
|==================== | 31%
|
|======================== | 37%
|
|========================== | 40%
|
|========================== | 41%
|
|=========================== | 41%
|
|=========================== | 42%
|
|============================== | 47%
|
|=============================== | 47%
|
|=============================== | 48%
|
|================================== | 53%
|
|===================================== | 57%
|
|======================================= | 60%
|
|======================================= | 61%
|
|========================================= | 64%
|
|========================================== | 65%
|
|============================================== | 71%
|
|=============================================== | 73%
|
|================================================ | 74%
|
|================================================== | 77%
|
|=================================================== | 79%
|
|======================================================= | 84%
|
|========================================================== | 90%
|
|============================================================ | 93%
|
|============================================================= | 94%
|
|============================================================== | 96%
|
|=============================================================== | 97%
|
|=================================================================| 100%
##
|
| | 0%
|
|== | 3%
|
|========== | 15%
|
|============= | 20%
|
|============== | 22%
|
|===================== | 33%
|
|======================= | 36%
|
|============================= | 45%
|
|============================== | 47%
|
|==================================== | 56%
|
|============================================ | 67%
|
|============================================= | 68%
|
|====================================================== | 84%
|
|========================================================= | 88%
|
|========================================================== | 89%
|
|============================================================= | 95%
|
|=================================================================| 100%
##
|
| | 0%
|
|= | 2%
|
|=============== | 23%
|
|=========================== | 42%
|
|================================= | 51%
|
|====================================== | 59%
|
|========================================= | 63%
|
|============================================= | 70%
|
|================================================= | 76%
|
|======================================================= | 84%
|
|=============================================================== | 97%
|
|=================================================================| 100%
##
|
| | 0%
|
| | 1%
|
|=== | 5%
|
|==== | 6%
|
|====== | 9%
|
|======= | 12%
|
|========= | 14%
|
|========= | 15%
|
|========== | 15%
|
|=========== | 18%
|
|============= | 20%
|
|============== | 21%
|
|================= | 25%
|
|================= | 26%
|
|=================== | 29%
|
|===================== | 32%
|
|===================== | 33%
|
|======================= | 36%
|
|======================== | 37%
|
|========================= | 38%
|
|========================== | 40%
|
|=========================== | 42%
|
|============================ | 42%
|
|============================ | 43%
|
|============================ | 44%
|
|=============================== | 47%
|
|================================ | 49%
|
|================================= | 50%
|
|================================== | 52%
|
|=================================== | 54%
|
|===================================== | 56%
|
|====================================== | 59%
|
|======================================= | 60%
|
|======================================== | 62%
|
|========================================= | 63%
|
|========================================== | 64%
|
|=========================================== | 67%
|
|============================================= | 69%
|
|============================================== | 71%
|
|================================================= | 75%
|
|=================================================== | 78%
|
|=================================================== | 79%
|
|==================================================== | 79%
|
|===================================================== | 81%
|
|===================================================== | 82%
|
|======================================================= | 84%
|
|======================================================= | 85%
|
|======================================================== | 86%
|
|========================================================= | 88%
|
|========================================================== | 89%
|
|============================================================ | 93%
|
|============================================================== | 95%
|
|============================================================== | 96%
|
|=============================================================== | 97%
|
|================================================================ | 98%
|
|=================================================================| 100%
##
|
| | 0%
|
|=== | 5%
|
|==== | 6%
|
|====== | 8%
|
|======== | 12%
|
|======== | 13%
|
|========= | 13%
|
|=========== | 17%
|
|============ | 19%
|
|============= | 20%
|
|============== | 21%
|
|=============== | 22%
|
|=============== | 23%
|
|================= | 26%
|
|================== | 28%
|
|=================== | 29%
|
|==================== | 31%
|
|===================== | 32%
|
|===================== | 33%
|
|====================== | 33%
|
|====================== | 34%
|
|======================= | 36%
|
|======================== | 37%
|
|========================= | 38%
|
|========================== | 40%
|
|=========================== | 42%
|
|============================ | 43%
|
|============================== | 46%
|
|=============================== | 48%
|
|================================ | 49%
|
|================================= | 50%
|
|================================= | 51%
|
|=================================== | 53%
|
|==================================== | 56%
|
|===================================== | 57%
|
|======================================= | 59%
|
|======================================== | 62%
|
|========================================= | 64%
|
|========================================== | 64%
|
|========================================== | 65%
|
|============================================ | 68%
|
|============================================== | 71%
|
|=============================================== | 72%
|
|=============================================== | 73%
|
|================================================ | 74%
|
|================================================== | 77%
|
|=================================================== | 79%
|
|==================================================== | 80%
|
|====================================================== | 83%
|
|======================================================= | 85%
|
|======================================================== | 86%
|
|========================================================= | 88%
|
|=========================================================== | 91%
|
|=========================================================== | 92%
|
|============================================================= | 94%
|
|============================================================== | 95%
|
|=============================================================== | 97%
|
|=================================================================| 99%
|
|=================================================================| 100%
##
|
| | 0%
|
| | 1%
|
|=== | 5%
|
|==== | 6%
|
|===== | 8%
|
|====== | 10%
|
|======= | 12%
|
|======== | 12%
|
|========= | 13%
|
|========== | 15%
|
|========== | 16%
|
|============ | 19%
|
|============= | 19%
|
|============== | 21%
|
|============== | 22%
|
|=============== | 24%
|
|================= | 26%
|
|================== | 28%
|
|=================== | 29%
|
|==================== | 30%
|
|====================== | 33%
|
|======================= | 35%
|
|======================= | 36%
|
|======================== | 37%
|
|========================= | 38%
|
|========================== | 40%
|
|=========================== | 41%
|
|============================ | 43%
|
|============================== | 45%
|
|=============================== | 47%
|
|=============================== | 48%
|
|================================ | 49%
|
|================================== | 52%
|
|==================================== | 55%
|
|==================================== | 56%
|
|====================================== | 58%
|
|====================================== | 59%
|
|======================================= | 60%
|
|========================================= | 62%
|
|========================================== | 65%
|
|=========================================== | 66%
|
|=========================================== | 67%
|
|============================================= | 69%
|
|============================================== | 71%
|
|=============================================== | 72%
|
|================================================ | 74%
|
|================================================= | 75%
|
|================================================== | 77%
|
|=================================================== | 79%
|
|==================================================== | 80%
|
|===================================================== | 81%
|
|===================================================== | 82%
|
|======================================================= | 84%
|
|======================================================== | 86%
|
|========================================================= | 88%
|
|========================================================== | 90%
|
|=========================================================== | 91%
|
|============================================================= | 93%
|
|============================================================== | 95%
|
|=============================================================== | 97%
|
|================================================================ | 98%
|
|=================================================================| 100%
##
|
| | 0%
|
| | 1%
|
|= | 1%
|
|= | 2%
|
|== | 3%
|
|== | 4%
|
|=== | 4%
|
|=== | 5%
|
|==== | 6%
|
|==== | 7%
|
|===== | 7%
|
|===== | 8%
|
|====== | 8%
|
|====== | 9%
|
|====== | 10%
|
|======= | 10%
|
|======= | 11%
|
|======== | 12%
|
|======== | 13%
|
|========= | 13%
|
|========= | 14%
|
|========= | 15%
|
|========== | 15%
|
|========== | 16%
|
|=========== | 16%
|
|=========== | 17%
|
|=========== | 18%
|
|============ | 18%
|
|============ | 19%
|
|============= | 19%
|
|============= | 20%
|
|============= | 21%
|
|============== | 21%
|
|============== | 22%
|
|=============== | 22%
|
|=============== | 23%
|
|=============== | 24%
|
|================ | 24%
|
|================ | 25%
|
|================= | 26%
|
|================= | 27%
|
|================== | 27%
|
|================== | 28%
|
|=================== | 29%
|
|=================== | 30%
|
|==================== | 30%
|
|==================== | 31%
|
|===================== | 32%
|
|===================== | 33%
|
|====================== | 33%
|
|====================== | 34%
|
|====================== | 35%
|
|======================= | 35%
|
|======================= | 36%
|
|======================== | 36%
|
|======================== | 37%
|
|======================== | 38%
|
|========================= | 38%
|
|========================= | 39%
|
|========================== | 39%
|
|========================== | 40%
|
|========================== | 41%
|
|=========================== | 41%
|
|=========================== | 42%
|
|============================ | 42%
|
|============================ | 43%
|
|============================ | 44%
|
|============================= | 44%
|
|============================= | 45%
|
|============================== | 46%
|
|============================== | 47%
|
|=============================== | 47%
|
|=============================== | 48%
|
|================================ | 49%
|
|================================ | 50%
|
|================================= | 50%
|
|================================= | 51%
|
|================================== | 52%
|
|================================== | 53%
|
|=================================== | 53%
|
|=================================== | 54%
|
|==================================== | 55%
|
|==================================== | 56%
|
|===================================== | 57%
|
|====================================== | 58%
|
|====================================== | 59%
|
|======================================= | 59%
|
|======================================= | 60%
|
|======================================= | 61%
|
|======================================== | 61%
|
|======================================== | 62%
|
|========================================= | 63%
|
|========================================= | 64%
|
|========================================== | 64%
|
|========================================== | 65%
|
|=========================================== | 66%
|
|=========================================== | 67%
|
|============================================ | 68%
|
|============================================= | 69%
|
|============================================= | 70%
|
|============================================== | 70%
|
|============================================== | 71%
|
|=============================================== | 72%
|
|=============================================== | 73%
|
|================================================ | 73%
|
|================================================ | 74%
|
|================================================= | 75%
|
|================================================= | 76%
|
|================================================== | 77%
|
|================================================== | 78%
|
|=================================================== | 78%
|
|=================================================== | 79%
|
|==================================================== | 79%
|
|==================================================== | 80%
|
|==================================================== | 81%
|
|===================================================== | 81%
|
|===================================================== | 82%
|
|====================================================== | 83%
|
|====================================================== | 84%
|
|======================================================= | 84%
|
|======================================================= | 85%
|
|======================================================== | 86%
|
|======================================================== | 87%
|
|========================================================= | 88%
|
|========================================================== | 89%
|
|========================================================== | 90%
|
|=========================================================== | 90%
|
|=========================================================== | 91%
|
|============================================================ | 92%
|
|============================================================ | 93%
|
|============================================================= | 93%
|
|============================================================= | 94%
|
|============================================================= | 95%
|
|============================================================== | 95%
|
|============================================================== | 96%
|
|=============================================================== | 96%
|
|=============================================================== | 97%
|
|================================================================ | 98%
|
|================================================================ | 99%
|
|=================================================================| 99%
|
|=================================================================| 100%
##
|
| | 0%
|
|== | 3%
|
|=== | 4%
|
|=== | 5%
|
|==== | 6%
|
|===== | 7%
|
|===== | 8%
|
|====== | 9%
|
|======= | 11%
|
|======== | 12%
|
|======== | 13%
|
|========= | 14%
|
|========== | 15%
|
|=========== | 17%
|
|============= | 20%
|
|============== | 22%
|
|================ | 25%
|
|================== | 27%
|
|=================== | 29%
|
|==================== | 31%
|
|==================== | 32%
|
|===================== | 32%
|
|====================== | 33%
|
|====================== | 34%
|
|======================= | 35%
|
|======================= | 36%
|
|======================== | 37%
|
|========================= | 38%
|
|========================= | 39%
|
|========================== | 40%
|
|========================== | 41%
|
|=========================== | 41%
|
|=========================== | 42%
|
|============================ | 42%
|
|============================ | 43%
|
|============================ | 44%
|
|============================= | 44%
|
|============================= | 45%
|
|============================== | 46%
|
|============================== | 47%
|
|=============================== | 48%
|
|================================ | 49%
|
|================================= | 50%
|
|================================= | 51%
|
|================================== | 52%
|
|================================== | 53%
|
|=================================== | 54%
|
|==================================== | 55%
|
|===================================== | 57%
|
|====================================== | 58%
|
|====================================== | 59%
|
|======================================= | 60%
|
|======================================== | 61%
|
|========================================= | 63%
|
|========================================= | 64%
|
|========================================== | 65%
|
|=========================================== | 66%
|
|============================================ | 67%
|
|============================================= | 69%
|
|============================================== | 71%
|
|=============================================== | 72%
|
|=============================================== | 73%
|
|================================================= | 75%
|
|================================================= | 76%
|
|================================================== | 76%
|
|=================================================== | 78%
|
|==================================================== | 79%
|
|==================================================== | 80%
|
|===================================================== | 81%
|
|====================================================== | 83%
|
|======================================================= | 84%
|
|======================================================= | 85%
|
|======================================================== | 86%
|
|======================================================== | 87%
|
|========================================================= | 87%
|
|========================================================== | 89%
|
|========================================================== | 90%
|
|============================================================ | 92%
|
|============================================================ | 93%
|
|============================================================= | 94%
|
|============================================================== | 95%
|
|============================================================== | 96%
|
|=============================================================== | 97%
|
|================================================================ | 99%
|
|=================================================================| 100%
##
|
| | 0%
|
| | 1%
|
|=== | 5%
|
|==== | 6%
|
|==== | 7%
|
|======= | 11%
|
|========= | 13%
|
|========= | 14%
|
|=========== | 16%
|
|=========== | 17%
|
|============= | 19%
|
|============= | 20%
|
|============== | 21%
|
|============== | 22%
|
|================= | 26%
|
|=================== | 29%
|
|===================== | 33%
|
|====================== | 34%
|
|======================= | 36%
|
|======================== | 37%
|
|========================== | 39%
|
|========================== | 40%
|
|============================ | 43%
|
|============================ | 44%
|
|============================= | 45%
|
|=============================== | 47%
|
|================================ | 49%
|
|================================ | 50%
|
|================================= | 51%
|
|================================== | 53%
|
|=================================== | 54%
|
|==================================== | 55%
|
|===================================== | 58%
|
|======================================= | 60%
|
|======================================== | 61%
|
|======================================== | 62%
|
|========================================= | 63%
|
|=========================================== | 66%
|
|============================================ | 67%
|
|============================================ | 68%
|
|============================================== | 71%
|
|================================================ | 73%
|
|================================================= | 75%
|
|================================================= | 76%
|
|=================================================== | 78%
|
|=================================================== | 79%
|
|===================================================== | 81%
|
|===================================================== | 82%
|
|======================================================= | 84%
|
|======================================================= | 85%
|
|======================================================== | 86%
|
|========================================================== | 90%
|
|=========================================================== | 91%
|
|============================================================ | 92%
|
|============================================================ | 93%
|
|============================================================= | 94%
|
|=============================================================== | 97%
|
|================================================================ | 99%
|
|=================================================================| 100%
##
|
| | 0%
|
| | 1%
|
|= | 1%
|
|= | 2%
|
|== | 2%
|
|== | 3%
|
|== | 4%
|
|=== | 4%
|
|=== | 5%
|
|==== | 5%
|
|==== | 6%
|
|==== | 7%
|
|===== | 7%
|
|===== | 8%
|
|====== | 9%
|
|====== | 10%
|
|======= | 10%
|
|======= | 11%
|
|======== | 12%
|
|======== | 13%
|
|========= | 13%
|
|========= | 14%
|
|========== | 15%
|
|========== | 16%
|
|=========== | 16%
|
|=========== | 17%
|
|=========== | 18%
|
|============ | 18%
|
|============ | 19%
|
|============= | 19%
|
|============= | 20%
|
|============= | 21%
|
|============== | 21%
|
|============== | 22%
|
|=============== | 22%
|
|=============== | 23%
|
|=============== | 24%
|
|================ | 24%
|
|================ | 25%
|
|================= | 25%
|
|================= | 26%
|
|================= | 27%
|
|================== | 27%
|
|================== | 28%
|
|=================== | 29%
|
|=================== | 30%
|
|==================== | 30%
|
|==================== | 31%
|
|==================== | 32%
|
|===================== | 32%
|
|===================== | 33%
|
|====================== | 33%
|
|====================== | 34%
|
|======================= | 35%
|
|======================= | 36%
|
|======================== | 36%
|
|======================== | 37%
|
|======================== | 38%
|
|========================= | 38%
|
|========================= | 39%
|
|========================== | 39%
|
|========================== | 40%
|
|========================== | 41%
|
|=========================== | 41%
|
|=========================== | 42%
|
|============================ | 42%
|
|============================ | 43%
|
|============================ | 44%
|
|============================= | 44%
|
|============================= | 45%
|
|============================== | 45%
|
|============================== | 46%
|
|============================== | 47%
|
|=============================== | 47%
|
|=============================== | 48%
|
|================================ | 49%
|
|================================ | 50%
|
|================================= | 50%
|
|================================= | 51%
|
|================================== | 52%
|
|================================== | 53%
|
|=================================== | 53%
|
|=================================== | 54%
|
|=================================== | 55%
|
|==================================== | 55%
|
|==================================== | 56%
|
|===================================== | 56%
|
|===================================== | 57%
|
|===================================== | 58%
|
|====================================== | 58%
|
|====================================== | 59%
|
|======================================= | 59%
|
|======================================= | 60%
|
|======================================= | 61%
|
|======================================== | 61%
|
|======================================== | 62%
|
|========================================= | 62%
|
|========================================= | 63%
|
|========================================= | 64%
|
|========================================== | 64%
|
|========================================== | 65%
|
|=========================================== | 66%
|
|=========================================== | 67%
|
|============================================ | 67%
|
|============================================ | 68%
|
|============================================= | 69%
|
|============================================= | 70%
|
|============================================== | 70%
|
|============================================== | 71%
|
|=============================================== | 72%
|
|=============================================== | 73%
|
|================================================ | 73%
|
|================================================ | 74%
|
|================================================ | 75%
|
|================================================= | 75%
|
|================================================= | 76%
|
|================================================== | 76%
|
|================================================== | 77%
|
|================================================== | 78%
|
|=================================================== | 78%
|
|=================================================== | 79%
|
|==================================================== | 79%
|
|==================================================== | 80%
|
|==================================================== | 81%
|
|===================================================== | 81%
|
|===================================================== | 82%
|
|====================================================== | 82%
|
|====================================================== | 83%
|
|====================================================== | 84%
|
|======================================================= | 84%
|
|======================================================= | 85%
|
|======================================================== | 86%
|
|======================================================== | 87%
|
|========================================================= | 87%
|
|========================================================= | 88%
|
|========================================================== | 89%
|
|========================================================== | 90%
|
|=========================================================== | 90%
|
|=========================================================== | 91%
|
|============================================================ | 92%
|
|============================================================ | 93%
|
|============================================================= | 93%
|
|============================================================= | 94%
|
|============================================================= | 95%
|
|============================================================== | 95%
|
|============================================================== | 96%
|
|=============================================================== | 96%
|
|=============================================================== | 97%
|
|=============================================================== | 98%
|
|================================================================ | 98%
|
|================================================================ | 99%
|
|=================================================================| 100%
##
|
| | 0%
|
|= | 1%
|
|==== | 6%
|
|====== | 9%
|
|======= | 11%
|
|========= | 14%
|
|========== | 16%
|
|=========== | 17%
|
|============= | 20%
|
|============== | 21%
|
|=============== | 22%
|
|================ | 24%
|
|================= | 26%
|
|================== | 28%
|
|==================== | 31%
|
|===================== | 33%
|
|====================== | 34%
|
|======================= | 35%
|
|=========================== | 41%
|
|============================== | 46%
|
|================================== | 52%
|
|===================================== | 57%
|
|========================================= | 63%
|
|============================================ | 68%
|
|================================================ | 74%
|
|================================================= | 76%
|
|================================================== | 77%
|
|=================================================== | 78%
|
|=================================================== | 79%
|
|==================================================== | 80%
|
|===================================================== | 81%
|
|====================================================== | 83%
|
|====================================================== | 84%
|
|========================================================= | 88%
|
|============================================================ | 92%
|
|============================================================ | 93%
|
|============================================================= | 94%
|
|============================================================== | 95%
|
|============================================================== | 96%
|
|=============================================================== | 97%
|
|================================================================ | 98%
|
|================================================================ | 99%
|
|=================================================================| 99%
|
|=================================================================| 100%
##
|
| | 0%
|
|= | 2%
|
|====== | 9%
|
|======= | 11%
|
|======== | 12%
|
|========== | 16%
|
|============= | 20%
|
|============== | 21%
|
|============== | 22%
|
|=============== | 23%
|
|================== | 27%
|
|=================== | 30%
|
|===================== | 32%
|
|======================= | 35%
|
|======================== | 37%
|
|=========================== | 42%
|
|============================= | 45%
|
|============================== | 47%
|
|================================ | 49%
|
|================================= | 50%
|
|================================== | 52%
|
|=================================== | 54%
|
|====================================== | 58%
|
|======================================== | 61%
|
|=========================================== | 66%
|
|============================================ | 67%
|
|============================================= | 69%
|
|================================================ | 74%
|
|================================================== | 77%
|
|=================================================== | 78%
|
|===================================================== | 81%
|
|======================================================= | 84%
|
|======================================================== | 86%
|
|========================================================= | 88%
|
|============================================================= | 94%
|
|================================================================ | 98%
|
|=================================================================| 100%
##
|
| | 0%
|
|= | 1%
|
|==== | 7%
|
|===== | 8%
|
|======= | 12%
|
|========== | 15%
|
|========== | 16%
|
|=========== | 17%
|
|============== | 21%
|
|=============== | 24%
|
|================ | 25%
|
|================= | 26%
|
|================== | 28%
|
|==================== | 31%
|
|===================== | 32%
|
|===================== | 33%
|
|======================== | 37%
|
|========================== | 41%
|
|============================ | 43%
|
|============================== | 46%
|
|=============================== | 48%
|
|================================= | 51%
|
|================================== | 52%
|
|================================== | 53%
|
|=================================== | 53%
|
|===================================== | 57%
|
|====================================== | 59%
|
|======================================= | 60%
|
|======================================== | 61%
|
|========================================== | 64%
|
|=========================================== | 67%
|
|============================================ | 68%
|
|============================================== | 71%
|
|================================================= | 75%
|
|================================================= | 76%
|
|==================================================== | 79%
|
|====================================================== | 82%
|
|====================================================== | 83%
|
|========================================================= | 87%
|
|========================================================== | 90%
|
|=========================================================== | 90%
|
|============================================================ | 92%
|
|============================================================= | 95%
|
|=============================================================== | 97%
|
|=================================================================| 100%
##
|
| | 0%
|
| | 1%
|
|=== | 5%
|
|==== | 6%
|
|==== | 7%
|
|===== | 8%
|
|====== | 10%
|
|======= | 10%
|
|======= | 11%
|
|======== | 13%
|
|========== | 15%
|
|=========== | 17%
|
|============ | 18%
|
|============= | 20%
|
|============== | 21%
|
|=============== | 23%
|
|================ | 24%
|
|================ | 25%
|
|================= | 26%
|
|================== | 28%
|
|=================== | 29%
|
|==================== | 30%
|
|===================== | 32%
|
|====================== | 34%
|
|======================= | 35%
|
|======================== | 37%
|
|========================= | 38%
|
|========================== | 40%
|
|=========================== | 41%
|
|============================ | 43%
|
|============================= | 44%
|
|============================== | 47%
|
|================================ | 49%
|
|================================ | 50%
|
|================================== | 52%
|
|=================================== | 54%
|
|==================================== | 55%
|
|===================================== | 56%
|
|====================================== | 58%
|
|====================================== | 59%
|
|======================================= | 60%
|
|======================================= | 61%
|
|========================================= | 63%
|
|========================================== | 65%
|
|============================================ | 68%
|
|============================================= | 69%
|
|=============================================== | 72%
|
|================================================ | 74%
|
|================================================= | 75%
|
|================================================= | 76%
|
|==================================================== | 80%
|
|====================================================== | 83%
|
|======================================================= | 84%
|
|======================================================= | 85%
|
|======================================================== | 86%
|
|========================================================== | 89%
|
|=========================================================== | 91%
|
|============================================================ | 93%
|
|============================================================= | 93%
|
|=============================================================== | 97%
|
|================================================================ | 98%
|
|=================================================================| 100%
##
|
| | 0%
|
|= | 1%
|
|========= | 14%
|
|========== | 15%
|
|=========== | 17%
|
|==================== | 30%
|
|========================= | 38%
|
|============================ | 43%
|
|===================================== | 56%
|
|=========================================== | 66%
|
|============================================ | 67%
|
|============================================ | 68%
|
|============================================= | 69%
|
|====================================================== | 83%
|
|=============================================================== | 96%
|
|=================================================================| 100%
g <- ggplot() +
geom_sf(data=state, fill="#2D3E50", color="#FEBF57", size=.15, show.legend = FALSE)
#all states
a <- g + geom_point(ordergeotable, mapping = aes(x=geolocation_lng, y=geolocation_lat, color = customer_state), position = "jitter", size=.15, alpha=1/2)+
coord_sf(xlim = c(-70,-30), ylim = c(-40,5), expand = FALSE)+ facet_wrap(~year)+
labs(y="Latitude of location.", x="longitude of location.",title="Mapping of different states of brazil in Map")
plot(a)
freq_count <- as.data.frame(table(ordertable$customer_unique_id))
odtbl <- ordertable %>% select_at(vars(year, month, customer_unique_id)) %>%
distinct_at(vars(year,month, customer_unique_id)) %>%
arrange_at(vars(year, month)) %>%
count_(vars(year, month))
odtbl <- odtbl %>% filter(!((month == "09" | month == "10") & year == "2018"))
odd <-odtbl %>% ggplot() + geom_line(mapping = aes(x = month, y = n, group = 1),show.legend = FALSE) +facet_wrap(~year)+geom_point(mapping = aes(x = month, y = n, group = 1))+labs(x="Month", y="Count of new customers added each month", title = "New customers purchased every consecutive month")+theme(axis.text.y= element_text(angle=90))
ggplotly(odd)
a <- items_data %>% left_join(orders_data) %>% mutate(mny = format(strptime(order_purchase_timestamp, "%Y-%m-%d %H:%M:%S"),'%Y-%m')) %>% group_by(mny) %>% summarise(total = sum(price)) %>% arrange(mny)
brazil_holidays <- brazil_holidays_data %>% mutate(mny = strftime(Date, format = "%Y-%m")) %>% group_by(mny) %>% mutate(holidays_by_week = paste0(Holiday, collapse = ",")) %>% select(mny, holidays_by_week)
brazil_holidays <- brazil_holidays[!duplicated(brazil_holidays$mny),]
p <- plot_ly(a, x = ~mny, y = ~total, type = 'scatter', mode = 'lines')
p <- p %>%
add_trace(
type = 'bar',
x = brazil_holidays$mny,
y = 1000000,
text = brazil_holidays$holidays_by_week,
hoverinfo = 'text',
marker = list(color='yellow'),
showlegend = F,
width = 0.3
) %>% layout(xaxis = list(autotick = F, dtick = 1)) %>%
layout(title = 'Monthly sale along with yearly holidays in Brazil',
xaxis = list(title = 'Time (Year - Month)',
autotick = F, dtick = 1),
yaxis = list(title = 'Total Purchase (in $)'))
p
Clearly, the sale increases significantly when there is an event during a particular month.
order_weekday <- orders_data %>% mutate(purchase_weekday = wday(order_purchase_timestamp), purchase_hour = format(strptime(order_purchase_timestamp, "%Y-%m-%d %H:%M:%S"),'%H')) %>% group_by(purchase_weekday, purchase_hour) %>% summarise(total_transactions = n())
p <- plot_ly(data = order_weekday,
x = ~purchase_hour,
y = ~purchase_weekday,
z = ~total_transactions,
type = "heatmap",
width = 1050,
height = 500,
colors = colorRamp(c("white","yellow", "red"))) %>%
layout(title = 'Transactions over the hour by day',
xaxis = list(title = 'Hour'),
yaxis = list(title = 'Day', tickvals = c(1, 2, 3, 4, 5, 6, 7), ticktext = c("Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday")),
legend = list(title = "Total Transactions")) %>%
add_annotations(x = order_weekday$purchase_hour, y = order_weekday$purchase_weekday, text = order_weekday$total_transactions, xref = 'x', yref = 'y', showarrow = FALSE, font=list(color='black'))
p
The heatmap indiciates that the majority of the transactions occur over the weekdays during office hours. We can thus increases our marketing strategies during this period.
payment_sum <- payments_data %>% filter(payment_type != "not_defined") %>% group_by(payment_type) %>% summarise(sum = sum(payment_value))
payment_count <- payments_data %>% filter(payment_type != "not_defined") %>% group_by(payment_type) %>% summarise(count = n())
p <- plot_ly() %>%
add_pie(data = payment_count, labels = ~payment_type, values = ~count, domain = list(x = c(0, 0.4), y = c(0.4, 1))) %>%
add_pie(data = payment_sum, labels = ~payment_type, values = ~sum, domain = list(x = c(0.6, 1), y = c(0.4, 1))) %>%
layout(title = "Number of payments vs Total payment values", showlegend = F,
xaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE),
yaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE))
p
The market in brazil is heavily dominated by credit card payments. Credit cards are used in ~74% of the total transactions amounting to ~78% of the total market revenue for Olist.
payment_group <- payments_data %>% filter(payment_type != "not_defined") %>% group_by(order_ID, payment_type) %>% summarise(count = n())
payment_order_group <- payment_group %>% left_join(orders_data) %>% select(order_ID, payment_type, count, order_purchase_timestamp) %>% mutate(purchase_mny = format(strptime(order_purchase_timestamp, "%Y-%m-%d %H:%M:%S"),'%Y-%m'))
payment_abc <- payment_order_group %>% group_by(purchase_mny, payment_type) %>% summarise(total_count = n())%>% ungroup()
p <- plot_ly(payment_abc, x = ~purchase_mny, y = ~total_count, color = ~payment_type, type = 'scatter', mode = 'lines+markers') %>% layout(title = 'Payment type growth monthly',
xaxis = list(title = 'Time (Year - Month)'),
yaxis = list(title = 'Total number of transactions'))
p
geo_data <- distinct(geolocation_data,geolocation_state,geolocation_zip_code_prefix,geolocation_city, .keep_all = TRUE)
# customer's geo location data
customers_geo_data <- customers_data %>%
left_join(geo_data,by=c("customer_state"="geolocation_state",
"customer_city"="geolocation_city",
"customer_zip_code_prefix" ="geolocation_zip_code_prefix")) %>%
rename("customer_geolocation_lat" = "geolocation_lat",
"customer_geolocation_lng" = "geolocation_lng" ) %>%
drop_na(customer_geolocation_lat,customer_geolocation_lng)
# seller's geo location data
sellers_geo_data <- sellers_data %>%
left_join(geo_data,by=c("seller_state"="geolocation_state",
"seller_city"="geolocation_city",
"seller_zip_code_prefix" ="geolocation_zip_code_prefix")) %>%
rename("seller_geolocation_lat" = "geolocation_lat",
"seller_geolocation_lng" = "geolocation_lng" ) %>%
drop_na(seller_geolocation_lat,seller_geolocation_lng)
# data manipulation data frame for sellers and orders delivered by these sellers
seller_orders_data <- items_data %>% left_join(sellers_geo_data, by="seller_ID") %>%
left_join(orders_data, by="order_ID") %>%
left_join(customers_geo_data, by="customer_ID") %>%
drop_na(order_approved_at,order_purchase_timestamp,order_delivered_carrier_date,
order_delivered_customer_date,seller_geolocation_lat,seller_geolocation_lng,
customer_geolocation_lat,customer_geolocation_lng) %>%
filter(order_status == "delivered") %>%
mutate(approved_in_days = round(difftime(strptime(order_approved_at,
format = "%Y-%m-%d %H:%M:%S"),
strptime(order_purchase_timestamp,
format="%Y-%m-%d %H:%M:%S"),
units="days")),
delivered_in_days = round(difftime(strptime(order_delivered_customer_date,
format = "%Y-%m-%d %H:%M:%S"),
strptime(order_purchase_timestamp,
format="%Y-%m-%d %H:%M:%S"),
units="days")),
estimated_in_days = round(difftime(strptime(order_estimated_delivery_date,
format = "%Y-%m-%d"),
strptime(order_purchase_timestamp,
format="%Y-%m-%d %H:%M:%S"),
units="days")),
del_to_carrier_in = round(difftime(strptime(order_delivered_carrier_date,
format = "%Y-%m-%d %H:%M:%S"),
strptime(order_approved_at,
format="%Y-%m-%d %H:%M:%S"),
units="days")),
delay = round(difftime(strptime(order_delivered_customer_date,
format = "%Y-%m-%d %H:%M:%S"),
strptime(order_estimated_delivery_date,
format="%Y-%m-%d"),
units="days"))) %>%
separate(order_purchase_timestamp , into=c("purchase_year",
"purchase_month", "purchase_date"),
sep="-") %>%
separate(purchase_date , into=c("purchase_date", "purchase_time"), sep=" ") %>%
select(seller_ID, seller_city, seller_zip_code_prefix, seller_state,
seller_geolocation_lat, seller_geolocation_lng,
customer_ID, customer_city, customer_state,customer_zip_code_prefix,
customer_geolocation_lat,customer_geolocation_lng,
order_ID, product_ID, approved_in_days,delivered_in_days,
estimated_in_days, del_to_carrier_in, delay,
purchase_year,purchase_month, purchase_date) %>%
mutate(delay = as.factor(if_else(delay <= 0 , 0, 1)))
# Top 10 sellers
top_sellers <- seller_orders_data %>% group_by(seller_ID) %>%
summarise(total_orders=n()) %>% top_n(10,total_orders)
# Top 10 Product Categories
products_data_english <- left_join(products_data,translations_data, by = c("product_category_name"))
products_data_eng_items <- left_join(items_data,products_data_english, by = "product_ID")
Top10_Product <- products_data_eng_items %>%
filter(!is.na(product_category_name_english)) %>%
group_by(product_category_name_english) %>%
tally(sort = TRUE) %>%
top_n(10)
# Yearly Analysis
yearly_product_sold <- seller_orders_data %>%
left_join(products_data, by="product_ID") %>%
left_join(translations_data,by="product_category_name") %>%
group_by(seller_ID, purchase_year, purchase_month) %>%
filter(seller_ID %in% top_sellers$seller_ID)%>%
mutate(seller_name = paste0("Seller_", seller_ID),
product_category_name_english = ifelse(product_category_name_english %in%
Top10_Product$product_category_name_english,
product_category_name_english, "Others"))
# Plot
top_seller_yearly_categories <- ggplot(yearly_product_sold,
aes(x=purchase_year, fill=product_category_name_english,
name="Product Categories")) +
geom_bar() + facet_grid(. ~ seller_name) +
labs(x = "Year", y= "Product Count")+
guides(fill=guide_legend("Product Categories"))
ggplotly(top_seller_yearly_categories)
#Plot all Cutomers and Sellers on Single Plot
seller_loc_data <- distinct(seller_orders_data,seller_ID, .keep_all = TRUE)
seller_loc_data <- seller_loc_data %>% mutate(seller_geolocation_lat = seller_geolocation_lat+0.5) %>%
filter(seller_geolocation_lat > -34, seller_geolocation_lng > -70)
seller_loc_data$img = sample(c("location.png"),nrow(seller_loc_data),replace=TRUE)
customer_loc_data <- distinct(customers_geo_data,customer_geolocation_lat,customer_geolocation_lng, .keep_all = TRUE)
customer_loc_data <- customer_loc_data %>%
mutate(customer_geolocation_lat = customer_geolocation_lat) %>%
filter(customer_geolocation_lat > -34, customer_geolocation_lat < 4,
customer_geolocation_lng > -70)
seller_map <- ggplot() +
geom_sf(data=state, fill="grey", color="brown", size=.15, show.legend = FALSE) +
geom_image(data= seller_loc_data, mapping = aes(x=seller_geolocation_lng,
y=seller_geolocation_lat,
image=img,
label = rownames(seller_loc_data)),
size=0.02) +
facet_wrap(~purchase_year) +
geom_sf_label(data=state, aes(label = abbrev_state),
label.padding= unit(0.15, "lines"), label.size = 0.05) +
labs(x = "Longitude", y= "Latitude")
seller_map
# Delivery distribution of top 10 sellers
delivery_distribution <- top_sellers %>%
left_join(seller_orders_data,by="seller_ID") %>% group_by(seller_ID) %>%
select(seller_ID,order_ID,approved_in_days,
delivered_in_days,estimated_in_days, del_to_carrier_in)%>%
mutate(seller_name = paste0("Seller_", seller_ID))
# Average Delivery Estimation by top 10 Sellers
mean_delivery_estimate <- delivery_distribution %>% group_by(seller_ID) %>%
summarize(avg_estimated_days = mean(estimated_in_days)) %>%
mutate(seller_name = paste0("Seller_", seller_ID))
# Average Delivery to Carrier
avg_carrier_del_days <- delivery_distribution %>% group_by(seller_ID) %>%
summarize(mean_carrier_del = mean(del_to_carrier_in)) %>%
mutate(seller_name = paste0("Seller_", seller_ID))
# Box plot for Delivery Distribution and Estimated Delivery
delivery_dist_plot <- plot_ly(data = mean_delivery_estimate, x = ~seller_name, y = ~avg_estimated_days,
type='scatter', mode="lines", name ="Estimated Delivery") %>%
add_trace(data= avg_carrier_del_days, x= ~seller_name, y= ~mean_carrier_del,
type='scatter',mode="lines", name ="Carrier Delivery") %>%
add_boxplot(data = delivery_distribution, x = ~seller_name, y = ~delivered_in_days,
color = ~seller_name, type = "box" , name= ~seller_name) %>%
layout(xaxis=list(title = "Top 10 Sellers", type = "category",
categoryorder = "array",
categoryarray = mean_delivery_estimate$seller_name,
size=8),
yaxis=list(title = "Number of Days",
range = c(0,50),
size=8))
delivery_dist_plot
loc_data <- seller_orders_data %>% group_by(seller_ID) %>%
summarise(products_sold=n()) %>% top_n(10,products_sold) %>%
left_join(seller_orders_data,by="seller_ID") %>%
group_by(seller_ID, customer_state, customer_city) %>%
mutate(seller_name = paste0("Seller_", seller_ID))
loc_data <- distinct(loc_data,seller_ID,customer_ID, .keep_all = TRUE)
loc_data <- distinct(loc_data,seller_ID,customer_state,customer_city,customer_zip_code_prefix, .keep_all = TRUE)
top_seller_loc_data <- loc_data %>% mutate(seller_geolocation_lat = seller_geolocation_lat+1.5) %>%
group_by(seller_name, seller_geolocation_lat,seller_geolocation_lng) %>%
count()
top_seller_loc_data$img = sample(c("location.png"),size=10,replace=TRUE)
delay_stats <- loc_data %>% group_by(seller_name) %>% summarise(prop = round((sum(delay==1)/n())*100,2))
seller_cust_map <- ggplot() +
geom_sf(data=state, fill="grey", color="brown", size=.15, show.legend = FALSE) +
geom_point(data=loc_data, mapping = aes(x=customer_geolocation_lng,
y=customer_geolocation_lat,
color=delay), position="jitter", size= 1) +
geom_image(data= top_seller_loc_data, mapping = aes(x=seller_geolocation_lng,
y=seller_geolocation_lat,
image=img,
label = rownames(top_seller_loc_data)),
size=0.1) +
facet_wrap(~seller_name) +
geom_text(data = delay_stats, mapping = aes(x = -Inf, y = -Inf,
label = paste0("Delay = ",prop,"%")),
hjust = -0.1, vjust = -1) +
scale_color_manual(breaks = c("1", "0"), labels = c("Delayed","On Time"),
name = "Delivery Status",
values=c("white", "blue")) +
labs(x = "Longitude", y= "Latitude", size=8)
seller_cust_map